QUESTION: Which songs have the lowest and highest overall scores?

To answer this question we will look at all songs across all algorithms.

First, we will sum their scores and sort them to find the lowest and highest scoring songs, then graph the results.



In [9]:

    
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
from scipy.signal import argrelmax, argrelmin
%matplotlib inline



In [10]:

    
def data_path(year, d_type='results'):
    code = os.getcwd().split("/")
    root = os.path.join("/", *code[:-1])
    if d_type == 'results':
        the_path = os.path.join(root, 'datasets', str(year), 'result_sets')
    elif d_type == 'groundtruth':
        the_path = os.path.join(root, 'datasets', str(year), 'ground_truth')
    elif d_type == 'outputs':
        the_path = os.path.join(root, 'datasets', str(year), 'outputs')
    assert os.path.isdir(the_path), 'no such path exists: {0}'.format(the_path)
    return the_path


def competition_files(year, d_type='results', ext='csv'):
    root = data_path(year, d_type)
    orig_dir = os.getcwd()
    os.chdir(root)
    file_struct = os.walk(root)
    comp_files = []
    for path, dirs, files in file_struct:
        for file in files:
            if ext == file.split(".")[-1]:
                comp_files.append(os.path.join(path, file))
    os.chdir(orig_dir)
    return comp_files


def competition_results(year, header=0):
    comp_results = {}
    comp_files = competition_files(year)
    songs_used = []
    for filename in comp_files:
        comp_results[filename.split("/")[-1]] = pd.read_csv(filename, header=header)

        #change values in column
        if year == 2009:
            comp_results[filename.split("/")[-1]]['filename'] = comp_results[filename.split("/")[-1]]['filename'].apply(lambda x: x.split(".")[0].lower().replace("__", "_-_"))
            songs_used = songs_used + comp_results[filename.split("/")[-1]].filename.unique().tolist()
        else:
            songs_used = songs_used + comp_results[filename.split("/")[-1]].File.unique().tolist()
  
    try:
        songs_used.remove('ave')
        songs_used.remove('weighted ave')
    except ValueError:
        pass
    
    return comp_results, np.array(list(set(songs_used)))


def songs_by_score(songs_and_sums):
    return sorted([(song, score) for song, score in songs_and_sums.items()], key=lambda x: x[1])


def summed_overlaps(comp_results, songs_used, col="Overlap_Score", file="filename"):
    # Go through each submitted algorithms results:
    songs = dict(zip(songs_used, [0]*len(songs_used)))
    for algorithm in comp_results:
        # For this algorithm, check all songs
        for idx, row in comp_results[algorithm].iterrows():
            songs[row[file]] = songs[row[file]] + row[col]
    return songs


def show_top_bottom_ten(overlap_sums, year):
    t = list(overlap_sums.items())
    t.sort(key=lambda x: x[1])

    plt.figure(figsize=(15,5))
    #plot bottom 10:
    plt.plot(range(10), [score for song, score in t[:10]])
    #plot top 10:
    plt.plot(range(10, 20), [score for song, score in t[-10:]])    
    plt.ylabel("Total overlap, year {0}".format(year))
    plt.xlabel("Song")
    
    song_labels = [song for song, score in t[:10]] + [song for song, score in t[-10:]]
    #plot song labels for bottom & top ten:
    plt.xticks(range(20), song_labels, rotation='vertical')
    
    plt.title("Lowest and Highest 10 Songs, year {0}".format(year))
    plt.show()
    return t
    
    
def show_all_scores(overlap_sums, year):
    values = np.array(list(overlap_sums.values()))

    plt.figure(figsize=(20,20))
    plt.plot(range(len(overlap_sums)), values)

    plt.ylabel("Total overlap, year {0}".format(year))
    plt.xlabel("Song")
    plt.title("Total Overlap Score for All Songs, year {0}".format(year))
    plt.show()



In [11]:

    
results_2009, songs_2009 = competition_results(2009)
overlap_sums_2009 = summed_overlaps(results_2009, songs_2009)
sorted_song_totals_2009 = show_top_bottom_ten(overlap_sums_2009, 2009)

Here we have both the top 10 songs by overall score, as well as the bottom 10. Scores here are totalled across all submitted algorithms.

The next question to answer is, do these songs have anything in common?

To investigate this we will examine the ground truth files for each of the 10 lowest and highest scorers. We will also look at the top and bottome 10 scorers in other years on the same dataset. The reasoning is that it there is something unique about the song that makes it difficult to predict the chords therein, that difficulty will persist across time. If not, then it can be concluded as a failing of the 2009 submissions.

Unfortunately, MIREX decided to change how it runs the competition after 2009. They claim to be using the same dataset, however now all of the songs have been relabled things like 'chord_mrx_09_000001', making it impossible to determine if the same songs have the lowest overlap score in subsequent years. Also, the overlap score is no-longer provided.

QUESTION: How do the overlaps compare across algorithms?

To answer this question we will order the songs alphabetically and plot the score for each, by algorithm. Then, we will overlay the plots to see the change in each algorithm's predictive powers over the set.



In [12]:

    
def compare_all_algo_overlaps(competition_results, songs, year, col="Overlap_Score"):
    plt.figure(figsize=(60,20))
    for result_set in competition_results.values():
        #iter through each algo 
        #plot this algo's overlap score.
        
        #get the overlap scores for this algo in a numpy array:
        overlaps = result_set[col].as_matrix()
        end = overlaps.shape[0]
        plt.plot(range(end), overlaps)

    plt.ylabel("Total overlap, year {0}".format(year))
    plt.xlabel("Song")
    plt.title("Total Overlap Score for All Songs, year {0}".format(year))
    plt.xticks(range(end), [song for song in songs], rotation='vertical')
    plt.show()
    
compare_all_algo_overlaps(results_2009, songs_2009, 2009)

What we observe here is that many of the peaks and valleys are shared across submissions. In other words, these differing algorithms all have trouble or ease with the same songs, generally speaking.

Let's find out what songs are a local minimum for all 13 submissions at the same time.



In [13]:

    
def bad_score_by_most_submissions(competition_results, songs, year, cutoff):
        min_idxes = []
        
        song_idx = []
        
        for result_set in competition_results.values():
            
            algos_min_idxes = argrelmin(np.array(list(result_set.Overlap_Score.tolist())))[0]
            
            for idx in algos_min_idxes:
                min_idxes.append(idx)
                    
        for idx in min_idxes:
            if min_idxes.count(idx) >= cutoff and idx not in song_idx:
                song_idx.append(idx)
        
        return songs[song_idx]



In [14]:

    
common_bad_songs_2009 = bad_score_by_most_submissions(results_2009, 
                                                      songs_2009, 
                                                      2009, 
                                                      13)

for song in common_bad_songs_2009:
    print(song)









    



03_-_she
04_-_nowhere_man
06_-_i_want_you
08_-_strawberry_fields_forever
cd2_-_13_-_good_night

Let's investigate whether or not these songs have something in common and if this is why most algorithms submitted did poorly on them



In [18]:

    
def ground_truth(year):
    ground_truth = {}
    truth_files = competition_files(year, d_type='groundtruth', ext='lab')
    for filename in truth_files:
        ground_truth[filename.split("/")[-1].split(".")[0].lower().replace("\'", "")] = pd.read_csv(filename, sep=" ",
                                                            names=['onset', 'offset', 'chord'])
    return ground_truth



In [19]:

    
ground_truth_2009 = ground_truth(2009)



In [21]:

    
#print(common_bad_songs_2009[0])
#ground_truth_2009['01_']
#ground_truth_2009[common_bad_songs_2009[0]]

#result = pd.concat([df.chord for df in ground_truth_2009.values()], axis=1)



In [22]:

    
#result

I see a lot of chords in minor keys. I am beginning to wonder if this has something to do with it?

I could look into the average amount of min/major/dim/#/and any chord modified with a number -- then I can check if this 5 songs differ in some way from the norm.

QUESTION: What is the average score of each algorithm?

QUESTION: What is the highest average scoring algorithm?

QUESTION: By how much is it better?

QUESTION: How much better is the average score when you choose the best algorithm each time?

QUESTION: Is they any way to predict which guess is best out of all?



In [23]:

    
#What is the average score of each algorithm? (overlap score)
def algorithms_ave(results):
    averages = []
    
    for algorithm in results.keys():
        averages.append((algorithm, results[algorithm].loc[results[algorithm]['filename'] == 'ave'].Overlap_Score.tolist()[0]))

    return sorted(averages, key=lambda x: x[1], reverse=True)



In [24]:

    
averages_2009 = algorithms_ave(results_2009)



In [25]:

    
# What is the highest-averaged algorithm?
averages_2009[0]









    Out[25]:





('OGF2.task1.overlap.results.csv', 0.72199999999999998)



In [26]:

    
# How much better is the best average?
averages_2009[1]









    Out[26]:





('MD.task1.overlap.results.csv', 0.71700000000000008)



In [27]:

    
# Whats the average when you choose the best algorithm for each song?
def best_algo(results, songs):
    test = []
    for song in songs:
        test.append([])

        for algo in results.keys():
            try:
                test[-1].append(results[algo].loc[results[algo]['filename'] == song].Overlap_Score.tolist()[0])
            except IndexError:
                pass
    return [max(rank) for rank in test]

highest_rankings = best_algo(results_2009, songs_2009)

# the average you get if you always choose the best algorithm for each song.
print(np.average(highest_rankings))









    



0.798358490566

Is there any way to predict which algorithm is best when you run all of them? I have the overlap values, so I cannot look into this for 2009. I'll try 2011.

MIREX 2011; using different stats

For the remainder of this project only the major/minor vocabulary will be considered (MIREX decided to use multiple vocabularies -- essentially multiple ways to round chord approximations -- for the results from this year forward.

They further decided to include additional songs into the dataset, and to relabel all of the songs making cross comparision with 2009 impossible.



In [28]:

    
results_2011, songs_2011 = competition_results(2011, header=1)



In [29]:

    
# what columns do we have?
print(*results_2011['CB3.csv'].columns.values, sep=", ", end="")









    



File, Pairwise score (%), Duration (s), All correct (%), Root correct (%), Type correct (%), All wrong (%), Unique ref chords, Unique test chords



In [30]:

    
# What is the average pairwise score for each algorithm?
def pairwise_averages(submissions, year):
    averages = []
    for submission in submissions.keys():
        averages.append((submission, np.average(submissions[submission]['Pairwise score (%)'])))
    return sorted(averages, key=lambda x: x[1], reverse=True)



In [31]:

    
# Why is SB8.csv so bad?
results_2011['SB8.csv']









    Out[31]:






  
    
      
      File
      Pairwise score (%)
      Duration (s)
      All correct (%)
      Root correct (%)
      Type correct (%)
      All wrong (%)
      Unique ref chords
      Unique test chords
    
  
  
    
      0
      chord_mrx_09_000000
      7.195279
      164.310204
      7.106907
      0.000000
      70.430939
      20.385091
      5
      2
    
    
      1
      chord_mrx_09_000001
      52.759570
      200.129245
      52.681304
      0.000000
      44.123252
      0.000000
      1
      2
    
    
      2
      chord_mrx_09_000002
      1.576387
      182.143129
      1.483939
      0.000000
      66.615397
      28.709145
      5
      2
    
    
      3
      chord_mrx_09_000003
      11.044484
      153.469388
      10.932808
      0.000000
      83.055223
      0.000000
      4
      2
    
    
      4
      chord_mrx_09_000004
      9.607959
      236.088889
      9.529460
      0.000000
      71.812197
      16.271073
      13
      2
    
    
      5
      chord_mrx_09_000005
      0.074933
      194.841000
      0.000000
      0.000000
      58.720187
      32.270929
      12
      2
    
    
      6
      chord_mrx_09_000006
      19.019114
      183.771429
      18.938346
      0.000000
      78.085743
      0.000000
      4
      2
    
    
      7
      chord_mrx_09_000007
      11.487391
      112.875102
      11.341121
      0.000000
      63.319890
      20.252870
      8
      2
    
    
      8
      chord_mrx_09_000008
      14.138102
      172.329805
      14.033722
      0.000000
      66.378449
      16.431711
      10
      2
    
    
      9
      chord_mrx_09_000009
      9.023314
      164.022857
      8.911832
      0.000000
      73.425121
      17.006623
      8
      2
    
    
      10
      chord_mrx_09_000010
      0.651220
      149.394286
      0.528534
      10.392921
      50.206887
      35.023514
      8
      2
    
    
      11
      chord_mrx_09_000011
      23.000611
      211.487347
      22.912971
      0.000000
      57.449006
      16.567853
      7
      2
    
    
      12
      chord_mrx_09_000012
      6.820186
      147.513469
      6.712081
      0.000000
      89.834989
      -0.000000
      3
      2
    
    
      13
      chord_mrx_09_000013
      11.122332
      277.080816
      11.064653
      0.000000
      84.629410
      1.521008
      8
      2
    
    
      14
      chord_mrx_09_000014
      0.168635
      263.332325
      0.111231
      0.000000
      92.366890
      2.213252
      5
      2
    
    
      15
      chord_mrx_09_000015
      4.418220
      111.962523
      4.257233
      1.262347
      79.902447
      14.087414
      10
      2
    
    
      16
      chord_mrx_09_000016
      5.151430
      180.192653
      5.071153
      2.497201
      66.556843
      23.156568
      9
      2
    
    
      17
      chord_mrx_09_000017
      17.153528
      132.465487
      17.017705
      0.000000
      75.969221
      6.266638
      8
      2
    
    
      18
      chord_mrx_09_000018
      0.172080
      101.459592
      0.000000
      0.000000
      87.597804
      0.000000
      3
      2
    
    
      19
      chord_mrx_09_000019
      0.101974
      145.075374
      0.000000
      0.000000
      68.888455
      26.959451
      5
      2
    
    
      20
      chord_mrx_09_000020
      0.116436
      129.509298
      0.000000
      19.166614
      52.260649
      23.608806
      6
      2
    
    
      21
      chord_mrx_09_000021
      1.015114
      200.943000
      0.926531
      0.000000
      95.667528
      0.000000
      5
      2
    
    
      22
      chord_mrx_09_000022
      0.118135
      157.152653
      0.000000
      0.000000
      94.330756
      2.179373
      4
      2
    
    
      23
      chord_mrx_09_000023
      5.847838
      110.184490
      5.694921
      0.000000
      69.673868
      20.604316
      4
      2
    
    
      24
      chord_mrx_09_000024
      6.623289
      154.955465
      6.527699
      0.000000
      77.641372
      12.609684
      8
      2
    
    
      25
      chord_mrx_09_000025
      1.204043
      183.133000
      1.116675
      0.000000
      96.370944
      0.000000
      6
      2
    
    
      26
      chord_mrx_09_000026
      0.085191
      174.158367
      0.000000
      0.000000
      97.679149
      -0.000000
      3
      2
    
    
      27
      chord_mrx_09_000027
      17.849418
      160.198986
      17.746345
      0.000000
      47.403349
      32.990151
      5
      2
    
    
      28
      chord_mrx_09_000028
      1.386207
      347.205000
      1.343581
      3.851759
      67.529851
      23.732809
      17
      2
    
    
      29
      chord_mrx_09_000029
      30.076955
      110.064000
      29.947031
      0.000000
      46.840111
      19.090711
      8
      2
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      187
      chord_mrx_09_000187
      5.308567
      139.337143
      5.195788
      0.000000
      72.330069
      20.064158
      8
      2
    
    
      188
      chord_mrx_09_000188
      17.908689
      158.255809
      17.798366
      6.421083
      68.604769
      3.768913
      8
      2
    
    
      189
      chord_mrx_09_000189
      0.065453
      234.814694
      0.000000
      6.731220
      73.204383
      11.594838
      6
      2
    
    
      190
      chord_mrx_09_000190
      68.220510
      462.400701
      0.000000
      0.000000
      0.000000
      0.000000
      0
      0
    
    
      191
      chord_mrx_09_000191
      0.085963
      182.044406
      0.000000
      0.000000
      83.281003
      2.136480
      4
      2
    
    
      192
      chord_mrx_09_000192
      7.366594
      50.024490
      7.070759
      0.000000
      85.277589
      0.000000
      3
      2
    
    
      193
      chord_mrx_09_000193
      2.061102
      156.210973
      1.966110
      0.000000
      96.697311
      -0.000000
      5
      2
    
    
      194
      chord_mrx_09_000194
      13.523701
      181.681633
      13.442441
      0.000000
      32.650698
      50.744816
      9
      2
    
    
      195
      chord_mrx_09_000195
      0.139322
      127.190204
      0.000000
      0.000000
      77.546440
      18.951706
      5
      2
    
    
      196
      chord_mrx_09_000196
      2.970178
      117.556605
      2.834560
      0.000000
      65.518071
      27.629999
      9
      2
    
    
      197
      chord_mrx_09_000197
      5.381410
      144.153140
      5.268435
      0.000000
      88.512362
      0.000000
      5
      2
    
    
      198
      chord_mrx_09_000198
      10.017200
      78.284271
      10.017200
      0.000000
      74.582889
      0.000000
      7
      2
    
    
      199
      chord_mrx_09_000199
      0.318158
      52.924082
      0.000000
      0.000000
      98.978973
      0.000000
      5
      2
    
    
      200
      chord_mrx_09_000200
      2.901507
      125.440000
      2.785914
      0.000000
      78.597618
      16.048868
      6
      2
    
    
      201
      chord_mrx_09_000201
      5.386459
      172.021000
      5.283564
      0.000000
      56.465292
      25.349231
      15
      2
    
    
      202
      chord_mrx_09_000202
      0.116218
      134.739592
      0.000000
      0.000000
      98.410101
      0.000000
      3
      2
    
    
      203
      chord_mrx_09_000203
      15.510297
      125.844446
      15.395156
      0.000000
      74.995384
      5.978226
      6
      2
    
    
      204
      chord_mrx_09_000204
      6.195669
      125.498415
      6.047509
      2.386701
      67.315629
      20.459735
      11
      2
    
    
      205
      chord_mrx_09_000205
      6.696657
      128.568888
      6.561227
      4.082713
      46.147822
      38.588248
      7
      2
    
    
      206
      chord_mrx_09_000206
      0.068025
      220.507000
      0.000000
      0.000000
      94.854132
      1.811734
      5
      2
    
    
      207
      chord_mrx_09_000207
      2.371608
      153.887347
      2.260263
      8.762136
      32.171564
      52.894464
      8
      2
    
    
      208
      chord_mrx_09_000208
      9.433501
      110.352834
      9.296760
      0.000000
      65.741520
      21.115203
      6
      2
    
    
      209
      chord_mrx_09_000209
      3.569932
      227.033107
      3.503332
      0.000000
      80.643016
      14.093583
      7
      2
    
    
      210
      chord_mrx_09_000210
      5.052894
      262.507000
      4.988896
      0.000000
      91.319012
      2.812877
      6
      2
    
    
      211
      chord_mrx_09_000211
      3.879746
      150.881000
      3.783644
      6.029487
      59.080467
      29.236716
      10
      2
    
    
      212
      chord_mrx_09_000212
      0.119096
      143.856327
      0.000000
      4.602971
      86.251888
      5.937150
      6
      2
    
    
      213
      chord_mrx_09_000213
      28.989399
      134.948571
      28.856333
      0.000000
      47.058283
      20.200457
      6
      2
    
    
      214
      chord_mrx_09_000214
      13.338068
      121.832199
      13.338068
      0.000000
      76.393606
      -0.000000
      6
      2
    
    
      215
      chord_mrx_09_000215
      7.185898
      252.187000
      7.117298
      4.342611
      53.925500
      26.330263
      10
      2
    
    
      216
      chord_mrx_09_000216
      2.602549
      127.582041
      2.484162
      0.000000
      90.687030
      2.830102
      5
      2
    
  

217 rows × 9 columns



In [32]:

    
results_2015, songs_2015 = competition_results(2015, header=1)



In [33]:

    
print(*results_2015['CM3.csv'].columns.values, sep=", ", end="")









    



File, Pairwise score (%), Duration (s), Correct chords (%), Substituted chords (%), Deleted chords (%), Inserted chords (%), Correct no-chords (%), maj correct (%), maj proportion (%), min correct (%), min proportion (%), 0 chromas wrong (%), 1 chroma wrong (%), 2 chromas wrong (%), 3 chromas wrong (%), Both correct (%), Only root correct (%), Only type correct (%), Both wrong (%), Unique ref chords, Unique test chords



In [34]:

    
print("2011")
pairwise_ave_2011 = pairwise_averages(results_2011, 2011)

print("Average pairwise score, by submission:\n")
for team, ave in pairwise_ave_2011:
    print("\t", team, ": ", ave)

print("\nAverage pairwise score, overall:\n")
print(np.average([ave for team, ave in pairwise_ave_2011]))


print("\n\n2015")
pairwise_ave_2015 = pairwise_averages(results_2015, 2015)
print("Average pairwise score, by submission:\n")
for team, ave in pairwise_ave_2015:
    print("\t", team, ": ", ave)

print("\nAverage pairwise score, overall\n")
print(np.average([ave for team, ave in pairwise_ave_2015]))









    



2011
Average pairwise score, by submission:

	 KO1.csv :  83.0884868341
	 CB4.csv :  82.9067395484
	 CB3.csv :  82.3049815161
	 NMSD2.csv :  81.9755760276
	 NMSD1.csv :  81.3272498479
	 KO2.csv :  80.4467090645
	 NG1.csv :  76.3820656912
	 CF2.csv :  76.1563450737
	 PP3.csv :  76.131154576
	 PP4.csv :  73.7860588664
	 NG2.csv :  71.7758610415
	 SB8.csv :  7.26376459447

Average pairwise score, overall:

72.7954160568


2015
Average pairwise score, by submission:

	 KO1.csv :  83.0884868341
	 DK9.csv :  77.8503528525
	 DK8.csv :  76.9473918802
	 DK7.csv :  76.9473918802
	 DK6.csv :  76.5584058111
	 DK5.csv :  74.1535448525
	 DK4.csv :  68.5614606083
	 CM3.csv :  55.3696745484

Average pairwise score, overall

73.6845886584

Here we see that the highest average pairwise score didn't increase at all from 2011 to 2015. We further see that the overall average of all submissions increased by a mere 0.9%. Finally, we see that this is due to the worst submission from 2011 (SB8.csv) not resubmitting for the 2015 contest. In other words, the algorithms didn't get better, instead, the worst were simply excluded.



In [35]:

    
compare_all_algo_overlaps(results_2015, songs_2015, 2015, col='Pairwise score (%)')



In [36]:

    
overlap_sums_2011 = summed_overlaps(results_2011, songs_2011, col='Pairwise score (%)', file="File")
sorted_song_totals_2011 = show_top_bottom_ten(overlap_sums_2011, 2011)

overlap_sums_2015 = summed_overlaps(results_2015, songs_2015, col='Pairwise score (%)', file="File")
sorted_song_totals_2015 = show_top_bottom_ten(overlap_sums_2015, 2015)



In [37]:

    
def common_songs(yearx, yeary):
    common_songs = []
    songsx = [song for song, score in yearx]
    songsy = [song for song, score in yeary]
    for song in songsx:
        if song in songsy:
            common_songs.append(song)
    return common_songs



In [38]:

    
print(common_songs(sorted_song_totals_2015[:10], sorted_song_totals_2011[:10]))









    



['chord_mrx_09_000017', 'chord_mrx_09_000199', 'chord_mrx_09_000050', 'chord_mrx_09_000151', 'chord_mrx_09_000190', 'chord_mrx_09_000031', 'chord_mrx_09_000053', 'chord_mrx_09_000030']

Here we see that 8 of the bottom 10 songs (when their total pairwise scores are tallied) persist from 2011 to 2015. This suggests either: these songs are inherently difficult, or, the submissions are overfit to the dataset.

QUESTION: What are the mistakes made on these songs?

Here I will compare the ground truth file with the output of each submission, and look for common mistakes.

QUESTION: What is the accuracy of predicting which algorithm will be best for given song?

Here we will use the outputs of all algorithms on a song for the feature matrix, and the ground truth as the target.



In [39]:

    
from sklearn.metrics import accuracy_score



In [40]:

    
from sklearn.tree import DecisionTreeClassifier



In [41]:

    
clf = DecisionTreeClassifier()



In [42]:

    
# need to make the X matrix of features.
# features need to be numbers -- can we use their ascii value of teh string?
# the chords of a song will make up the row of a matrix.
# (as they are listed in the ground truth)

def x_matrix(truth_files):
    '''Return the feature matrix for a single song.
    
    Get each algorithm\'s hashed output and store it in separate rows
    of the returned matrix.'''
    
    file_base_name = 'chord_mrx_09_{:0>6}.lab'
    X = []
    longest_row = 0
    chords = []

    assert os.path.isdir(truth_files), 'no such dir:\n{0}'.format(truth_files)
    
    for song_number in range(217):
        #iter through all songs
        
        chords = []
        f_file = os.path.join(truth_files, file_base_name.format(song_number))
        assert os.path.isfile(f_file), 'no such file:\n{0}'.format(f_file)

        #print(f_file)
        with open(f_file) as infile:
            # iter through all chords in the song
            for line in infile.readlines():
                chords.append(line.split()[-1])

        X.append([hash(chord) for chord in chords])
        longest_row = max(longest_row, len(chords))
    

    return squared_matrix(X, longest_row)


def squared_matrix(M, n):
    for row in M:
        while (len(row) < n):
            row.append(0)
    return np.matrix(M)



In [43]:

    
truth_files = data_path(2015, d_type='groundtruth')
X = x_matrix(truth_files)
X.shape









    Out[43]:





(217, 262)



In [44]:

    
def get_Y(year_results):
    Y = []
    #algos = ['CM3.csv', 'DK4.csv', 'DK5.csv', 'DK6.csv',
    #        'DK7.csv', 'DK8.csv', 'DK9.csv', 'KO1.csv']
    algos = list(year_results.keys())
    for x in range(217):
        # iter the songs
        song = 'chord_mrx_09_{:0>6}'.format(x)
        #print(song)
        best = ['', -1]
        
        for algo in algos:
            # select the best performing algo on this song
            df = year_results[algo]
            #print('ALGO:', algo)
            score = df.loc[df['File'] == song]['Pairwise score (%)'].tolist()[0]
            #print("ALGO:", algo, '--', score)
            if score > best[-1]:
                # this was a better scoring algo -- replace the current leading
                # pair with this one:
                best = [algo, score]
        
        Y.append(hash(best[0]))  # append the hash of the best scoring algo to Y
    return Y



In [45]:

    
Y = get_Y(results_2015)

#training the classifier on the first 100 songs:
clf.fit(X[:100], Y[:100])
#
#testing on the last 117 songs in the set:
Y_pred = clf.predict(X[100:])
accuracy_score(Y[100:], Y_pred)









    Out[45]:





0.53846153846153844



In [46]:

    
# This is a pretty good guess. Random is would be 0.125 since there are 8 algorithms

# what would the average be if you chose the best at every song?
def get_best_scores(year_results):
    Y = []
    algos = list(year_results.keys())
    for x in range(217):
        # iter the songs
        song = 'chord_mrx_09_{:0>6}'.format(x)
        #print(song)
        best = ['', -1]
        
        for algo in algos:
            # select the best performing algo on this song
            df = year_results[algo]
            #print('ALGO:', algo)
            score = df.loc[df['File'] == song]['Pairwise score (%)'].tolist()[0]
            #print("ALGO:", algo, '--', score)
            if score > best[-1]:
                # this was a better scoring algo -- replace the current leading
                # pair with this one:
                best = [algo, score]
        
        Y.append(best[-1])  # append the hash of the best scoring algo to Y
    return Y

np.average(get_best_scores(results_2015))









    Out[46]:





84.189825179723513



In [47]:

    
# this is actually not that much better than the best algorithm by itself. 
# What this means is that, for the most part, one algorithm is consistenly the best
# across all songs.



In [48]:

    
from sklearn import svm
svm_clf = svm.SVC()
svm_clf.fit(X[:100], Y[:100])
y_pred_svm = svm_clf.predict(X[100:])
accuracy_score(Y[100:], y_pred_svm)









    Out[48]:





0.76068376068376065



In [49]:

    
from sklearn.ensemble import RandomForestClassifier
rnd_forset_clf = RandomForestClassifier()
rnd_forset_clf.fit(X[:100], Y[:100])
y_pred_rnd = rnd_forset_clf.predict(X[100:])
accuracy_score(Y[100:], y_pred_rnd)









    Out[49]:





0.7350427350427351



In [50]:

    
# So, with SVM we can guess which algorithm will give the best results 76% of the time.
# this is really high considering random is 12.5%

Perhaps we can look at the average length of a chord name in the ground truth files, the average for the 'bad' songs, for the 'good' songs, and overall.

Here we would consider longer chord names to mean more complicated, therefore unusual, chords.



In [51]:

    
print(common_songs(sorted_song_totals_2015[:10], sorted_song_totals_2011[:10]))









    



['chord_mrx_09_000017', 'chord_mrx_09_000199', 'chord_mrx_09_000050', 'chord_mrx_09_000151', 'chord_mrx_09_000190', 'chord_mrx_09_000031', 'chord_mrx_09_000053', 'chord_mrx_09_000030']



In [52]:

    
def ave_chord_length(root, songs, ext='.lab', algos=None):
    '''Calculate the average chord length for each song, for the set.
    
    This will be weighted by the length of the song in time.'''
    
    avgs_weights = []
    
    assert os.path.isdir(root), 'no such dir: {0}'.format(root)
    if algos is None:
        for song in songs:
            # iter through all songs
            song_file = os.path.join(root, song + ext)
            assert os.path.isfile(song_file), 'no such file: {0}'.format(song_file)
            with open(song_file) as stream:
                song_data = stream.read().split()

            lengths = list([len(chord) for a, b, chord in zip(*[iter(song_data)]*3)])
            #print(lengths)
            average = np.average(lengths)
            weight = float(song_data[-2])

            avgs_weights.append((average, weight))
    else:
        # do as above but average over all algos
        
        for song in songs:
            # iter through all songs
            for algo in algos:
                # iter through each algo's output for this song.
                algo_avgs = []
                song_file = os.path.join(root, algo, song + ext)
                assert os.path.isfile(song_file), 'no such file: {0}'.format(song_file)
                with open(song_file) as stream:
                    song_data = stream.read().split()

                lengths = list([len(chord) for a, b, chord in zip(*[iter(song_data)]*3)])
                average = np.average(lengths)
                weight = float(song_data[-2])
                algo_avgs.append(average)
                avgs_weights.append((np.average(algo_avgs), weight))
    
    return avgs_weights



In [53]:

    
avg_and_weights_groundtruth_2015 = ave_chord_length(truth_files, songs_2015)

avg_chord_len_groundtruth_2015 = np.average([avg for avg, weight in avg_and_weights_groundtruth_2015],
          weights=[weight for avg, weight in avg_and_weights_groundtruth_2015])

# weight average chord length from 2015 ground truth:
avg_chord_len_groundtruth_2015









    Out[53]:





2.9448496051642525



In [54]:

    
output_files = data_path(2015, d_type='outputs')

avg_and_weights_2015 = ave_chord_length(output_files, songs_2015, ext=".wav.txt",
                                        algos=['CM3', 'DK4', 'DK5', 'DK6', 'DK7', 'DK8', 'DK9', 'KO1'])
avg_chord_len_2015 = np.average([avg for avg, weight in avg_and_weights_2015],
          weights=[weight for avg, weight in avg_and_weights_2015])

#overall average across all song outputs from all submissions:
avg_chord_len_2015









    Out[54]:





5.1383852210219807



In [55]:

    
bad_songs = common_songs(sorted_song_totals_2015[:10], sorted_song_totals_2011[:10])
good_songs = common_songs(sorted_song_totals_2015[-10:], sorted_song_totals_2011[-10:])



In [56]:

    
bad_avg_and_weights_2015 = ave_chord_length(output_files, bad_songs, ext=".wav.txt",
                                        algos=['CM3', 'DK4', 'DK5', 'DK6', 'DK7', 'DK8', 'DK9', 'KO1'])
bad_avg_chord_len_2015 = np.average([avg for avg, weight in bad_avg_and_weights_2015],
          weights=[weight for avg, weight in bad_avg_and_weights_2015])

#overall average across all 'bad' song outputs from all submissions:
bad_avg_chord_len_2015









    Out[56]:





5.0687015911893347



In [57]:

    
good_avg_and_weights_2015 = ave_chord_length(output_files, good_songs, ext=".wav.txt",
                                        algos=['CM3', 'DK4', 'DK5', 'DK6', 'DK7', 'DK8', 'DK9', 'KO1'])
good_avg_chord_len_2015 = np.average([avg for avg, weight in good_avg_and_weights_2015],
          weights=[weight for avg, weight in good_avg_and_weights_2015])

#overall average across all 'bad' song outputs from all submissions:
good_avg_chord_len_2015









    Out[57]:





4.9289542019987191

There isn't much difference between the bottom, top, or all songs average guessed chord length. But, there is a significant difference between the average guessed chord length and the average actual chord length. In fact, it is an approximaly 75% increase in average length. This suggests that over all the algorithms used believe songs to be much more complicated than they usually are.



In [ ]:

	File	Pairwise score (%)	Duration (s)	All correct (%)	Root correct (%)	Type correct (%)	All wrong (%)	Unique ref chords	Unique test chords
0	chord_mrx_09_000000	7.195279	164.310204	7.106907	0.000000	70.430939	20.385091	5	2
1	chord_mrx_09_000001	52.759570	200.129245	52.681304	0.000000	44.123252	0.000000	1	2
2	chord_mrx_09_000002	1.576387	182.143129	1.483939	0.000000	66.615397	28.709145	5	2
3	chord_mrx_09_000003	11.044484	153.469388	10.932808	0.000000	83.055223	0.000000	4	2
4	chord_mrx_09_000004	9.607959	236.088889	9.529460	0.000000	71.812197	16.271073	13	2
5	chord_mrx_09_000005	0.074933	194.841000	0.000000	0.000000	58.720187	32.270929	12	2
6	chord_mrx_09_000006	19.019114	183.771429	18.938346	0.000000	78.085743	0.000000	4	2
7	chord_mrx_09_000007	11.487391	112.875102	11.341121	0.000000	63.319890	20.252870	8	2
8	chord_mrx_09_000008	14.138102	172.329805	14.033722	0.000000	66.378449	16.431711	10	2
9	chord_mrx_09_000009	9.023314	164.022857	8.911832	0.000000	73.425121	17.006623	8	2
10	chord_mrx_09_000010	0.651220	149.394286	0.528534	10.392921	50.206887	35.023514	8	2
11	chord_mrx_09_000011	23.000611	211.487347	22.912971	0.000000	57.449006	16.567853	7	2
12	chord_mrx_09_000012	6.820186	147.513469	6.712081	0.000000	89.834989	-0.000000	3	2
13	chord_mrx_09_000013	11.122332	277.080816	11.064653	0.000000	84.629410	1.521008	8	2
14	chord_mrx_09_000014	0.168635	263.332325	0.111231	0.000000	92.366890	2.213252	5	2
15	chord_mrx_09_000015	4.418220	111.962523	4.257233	1.262347	79.902447	14.087414	10	2
16	chord_mrx_09_000016	5.151430	180.192653	5.071153	2.497201	66.556843	23.156568	9	2
17	chord_mrx_09_000017	17.153528	132.465487	17.017705	0.000000	75.969221	6.266638	8	2
18	chord_mrx_09_000018	0.172080	101.459592	0.000000	0.000000	87.597804	0.000000	3	2
19	chord_mrx_09_000019	0.101974	145.075374	0.000000	0.000000	68.888455	26.959451	5	2
20	chord_mrx_09_000020	0.116436	129.509298	0.000000	19.166614	52.260649	23.608806	6	2
21	chord_mrx_09_000021	1.015114	200.943000	0.926531	0.000000	95.667528	0.000000	5	2
22	chord_mrx_09_000022	0.118135	157.152653	0.000000	0.000000	94.330756	2.179373	4	2
23	chord_mrx_09_000023	5.847838	110.184490	5.694921	0.000000	69.673868	20.604316	4	2
24	chord_mrx_09_000024	6.623289	154.955465	6.527699	0.000000	77.641372	12.609684	8	2
25	chord_mrx_09_000025	1.204043	183.133000	1.116675	0.000000	96.370944	0.000000	6	2
26	chord_mrx_09_000026	0.085191	174.158367	0.000000	0.000000	97.679149	-0.000000	3	2
27	chord_mrx_09_000027	17.849418	160.198986	17.746345	0.000000	47.403349	32.990151	5	2
28	chord_mrx_09_000028	1.386207	347.205000	1.343581	3.851759	67.529851	23.732809	17	2
29	chord_mrx_09_000029	30.076955	110.064000	29.947031	0.000000	46.840111	19.090711	8	2
...	...	...	...	...	...	...	...	...	...
187	chord_mrx_09_000187	5.308567	139.337143	5.195788	0.000000	72.330069	20.064158	8	2
188	chord_mrx_09_000188	17.908689	158.255809	17.798366	6.421083	68.604769	3.768913	8	2
189	chord_mrx_09_000189	0.065453	234.814694	0.000000	6.731220	73.204383	11.594838	6	2
190	chord_mrx_09_000190	68.220510	462.400701	0.000000	0.000000	0.000000	0.000000	0	0
191	chord_mrx_09_000191	0.085963	182.044406	0.000000	0.000000	83.281003	2.136480	4	2
192	chord_mrx_09_000192	7.366594	50.024490	7.070759	0.000000	85.277589	0.000000	3	2
193	chord_mrx_09_000193	2.061102	156.210973	1.966110	0.000000	96.697311	-0.000000	5	2
194	chord_mrx_09_000194	13.523701	181.681633	13.442441	0.000000	32.650698	50.744816	9	2
195	chord_mrx_09_000195	0.139322	127.190204	0.000000	0.000000	77.546440	18.951706	5	2
196	chord_mrx_09_000196	2.970178	117.556605	2.834560	0.000000	65.518071	27.629999	9	2
197	chord_mrx_09_000197	5.381410	144.153140	5.268435	0.000000	88.512362	0.000000	5	2
198	chord_mrx_09_000198	10.017200	78.284271	10.017200	0.000000	74.582889	0.000000	7	2
199	chord_mrx_09_000199	0.318158	52.924082	0.000000	0.000000	98.978973	0.000000	5	2
200	chord_mrx_09_000200	2.901507	125.440000	2.785914	0.000000	78.597618	16.048868	6	2
201	chord_mrx_09_000201	5.386459	172.021000	5.283564	0.000000	56.465292	25.349231	15	2
202	chord_mrx_09_000202	0.116218	134.739592	0.000000	0.000000	98.410101	0.000000	3	2
203	chord_mrx_09_000203	15.510297	125.844446	15.395156	0.000000	74.995384	5.978226	6	2
204	chord_mrx_09_000204	6.195669	125.498415	6.047509	2.386701	67.315629	20.459735	11	2
205	chord_mrx_09_000205	6.696657	128.568888	6.561227	4.082713	46.147822	38.588248	7	2
206	chord_mrx_09_000206	0.068025	220.507000	0.000000	0.000000	94.854132	1.811734	5	2
207	chord_mrx_09_000207	2.371608	153.887347	2.260263	8.762136	32.171564	52.894464	8	2
208	chord_mrx_09_000208	9.433501	110.352834	9.296760	0.000000	65.741520	21.115203	6	2
209	chord_mrx_09_000209	3.569932	227.033107	3.503332	0.000000	80.643016	14.093583	7	2
210	chord_mrx_09_000210	5.052894	262.507000	4.988896	0.000000	91.319012	2.812877	6	2
211	chord_mrx_09_000211	3.879746	150.881000	3.783644	6.029487	59.080467	29.236716	10	2
212	chord_mrx_09_000212	0.119096	143.856327	0.000000	4.602971	86.251888	5.937150	6	2
213	chord_mrx_09_000213	28.989399	134.948571	28.856333	0.000000	47.058283	20.200457	6	2
214	chord_mrx_09_000214	13.338068	121.832199	13.338068	0.000000	76.393606	-0.000000	6	2
215	chord_mrx_09_000215	7.185898	252.187000	7.117298	4.342611	53.925500	26.330263	10	2
216	chord_mrx_09_000216	2.602549	127.582041	2.484162	0.000000	90.687030	2.830102	5	2